#!/usr/bin/python
# This script downloads Fasta files from NCBI popset database
# Usage: python downloadPopset.py pop.list
# Note: pop.list should contain UID ids in each line


import sys
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord
from Bio import Entrez

popsetfile = sys.argv[1]
popset = open(popsetfile, "rU")
ps = popset.readlines()

Entrez.email = "jimmysaw@gmail.com"

for pop in ps:
    iD = pop.strip()
    rec = Entrez.efetch(db="popset", id=iD, retmode="fasta")
    sequences = SeqIO.parse(rec, 'fasta')
    seqrecs = []
    for i in sequences:
        tmp = i.description.split('|')
        tmprec = SeqRecord(i.seq, id=tmp[3], description=tmp[4].strip())
        seqrecs.append(tmprec)
    outfile = iD + ".popset.fasta"
    SeqIO.write(seqrecs, outfile, 'fasta')

popset.close()
